KIRC Analysis

In [1]:
cd ../src
/cellar/users/agross/TCGA_Code/TCGA/Pathway_Merge/src
In [2]:
import os as os
import pickle as pickle
import subprocess
import pandas as pd

from Processing.Tests import run_feature_matrix, SurvivalTest
In [3]:
pd.set_option('precision',3)
In [4]:
drugs = {'TKI': ['perifosine', 'sunitinib', 'sorafenib', 'pazopanib', 'sutent', 'tarceva','nexavaar',
         'sutent (sunitinib)', 'gefitinib', 'nexavar', 'bay-439006', 
         'azd', 'iressa', 'sorafenib - nexavar', 'axitinib', 'sunitinib (sutent)', 'tipifarnib',
         'tyrosine kinase inhibitor', 'votrient', 'zd6474'],
         'VEGF Ab': ['bevacizumab', 'avastin'],
         'mTORi': ['temsirolimus','everolimus','rad001','torisel','afinitor'],
         'IL2/IF': ['interferon', 'il-2','il-2 (high dose)','proleukin (il-2)',
          'interleukin-2','interferon-alpha','interferon alpha',
          'intron a', 'alpha interferon', 'proleukin'
          'roferon-a','il-2 thearpy (interleukin)','high dose interleukin-2',
          'ifn-alpha (intron)', 'interleukin 2-high dose', 'inf'],
          'Chemo': ['bortezomib', 'gemictiabine', '5-fluorouracil','capecitabine','gemzar','thalidomide','nab-rapamycin',
                    'capecitabin', 'gemcitabine','xeloda'],
          'Vaccine': ['oncophage', 'oncophage vaccine']}
drug_map = {drug:family for family, drugs in drugs.items() for drug in drugs}
In [5]:
result_path = '/scratch/TCGA/Firehose__2012_01_16/ucsd_analyses'
run = sorted(os.listdir(result_path))[1]
run = pickle.load(open('/'.join([result_path, run, 'RunObject.p']), 'rb'))
In [6]:
cancer = run.load_cancer('KIRC')
clinical = cancer.load_clinical()
global_vars = cancer.load_global_vars()
In [7]:
clinical.clinical.tumor_stage.value_counts()
Out[7]:
stge i      241
stge iii    121
stge iv      76
stge ii      49
In [8]:
mut = cancer.load_data('MAF')
mut.uncompress()
meth = cancer.load_data('Methylation')
cn = cancer.load_data('CN_broad')
cn.uncompress()
rna = cancer.load_data('mRNASeq')
rppa  = cancer.load_data('RPPA')

Section 1: Stratification Variables

Clinical Variables

In [9]:
stage = clinical.clinical.tumor_stage
stage = stage.map(lambda s: s.replace('stge', 'stage'))
stage.value_counts().sort_index().plot(kind='bar')
Out[9]:
<matplotlib.axes.AxesSubplot at 0x63c8410>
In [10]:
fig, axs = subplots(1,2, figsize=(12,4))
v = clinical.clinical.tumor_grade
v.value_counts().sort_index().plot(kind='bar', title=v.name, ax=axs[0]);
v = clinical.clinical.tumor_grade.dropna().map(lambda s: s[:2])
v.value_counts().sort_index().plot(kind='bar', title=v.name, ax=axs[1]);
axs[0].set_ylabel('# of Patients')
Out[10]:
<matplotlib.text.Text at 0x5a591d0>
In [11]:
age = clinical.clinical.age
by_stage = pd.DataFrame({s: age[stage[stage==s].index].describe() for s in stage.unique()})
all_stage = pd.Series(clinical.clinical.age.describe(), name='All')
by_stage.join(all_stage).astype(object)
Out[11]:
stage i stage ii stage iii stage iv All
count 241 49 120 76 486
mean 60.3 60.7 63.3 61.2 61.2
std 12.8 12.5 12.1 9.9 12.2
min 26.6 39.3 32.1 33.5 26.6
25% 51.4 49.5 56 55.7 52.2
50% 59.8 59.7 63.9 61.3 61.2
75% 70.2 69.8 72.9 66.2 70.4
max 90.1 86.5 88.7 84.2 90.1
In [12]:
age.hist()
ylabel('# of Patients')
xlabel('Age')
Out[12]:
<matplotlib.text.Text at 0x6202850>
In [13]:
clinical.clinical.lymphnode_stage.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Status')
Out[13]:
<matplotlib.text.Text at 0x5b83610>
In [14]:
clinical.clinical.gender.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Gender')
Out[14]:
<matplotlib.text.Text at 0x5c83490>
In [15]:
pd.crosstab(stage, clinical.clinical.calcium_level)
Out[15]:
calcium_level elevated low normal
tumor_stage
stage i 1 109 50
stage ii 0 16 15
stage iii 3 46 35
stage iv 6 22 30
In [16]:
s = pd.crosstab(stage, clinical.clinical.calcium_level).ix['stage iv']
s[['low','normal','elevated']].plot(kind='bar')
ylabel('# of Patients')
xlabel('Status')
Out[16]:
<matplotlib.text.Text at 0x5c83dd0>
In [17]:
pd.crosstab(stage, clinical.clinical.eastern_cancer_oncology_group.dropna())
Out[17]:
eastern_cancer_oncology_group 0 1 2
tumor_stage
stage i 18 2 2
stage ii 3 2 0
stage iii 13 3 0
stage iv 15 9 3
In [18]:
clinical.clinical.hemoglobin.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Hemoglobin Level')
Out[18]:
<matplotlib.text.Text at 0x7f1c890>
In [81]:
import Reports.Figures as F
reload(F)
from Reports.Figures import *
from Processing.Tests import *
from Reports.NotebookTools import *
In [82]:
def draw_survival_curves(feature, surv, assignment=None, filename='tmp.png', show=False, 
                               title=True, labels=['No Mutation', 'Mutation'], 
                               colors=['blue','red'], ann=None, show_legend=True, q=.25):
    if assignment is None:
        num_panels = 1
    else:
        num_panels = len(assignment.unique())
        
    r.png(filename=filename, width=200*(num_panels+1), height=300, res=75)
        
    fmla = robjects.Formula('Surv(days, event) ~ feature')
    r.par(mfrow=r.c(1, num_panels))
    r.par(mar=r.c(4,5,4,1))
    r.par(xpd=True)
    
    if (get_vec_type(feature) == 'real') and (len(feature.unique()) > 5):
        colors=['blue','orange','red']
        if q == .5:
            labels=['Bottom 50%', 'Top 50%']
        else:
            labels=['Bottom {}%'.format(int(q*100)), 'Normal', 'Top {}%'.format(int(q*100))]
            
    ls = r.c(*colors)
    
    def plot_me(sub_f, label):
        if (get_vec_type(sub_f) == 'real') and (len(sub_f.unique()) > 5):
            sub_f = to_quants(sub_f, q=q)
        m = get_cox_ph_ms(surv, sub_f, return_val='model', formula=fmla)
        r_data = m.rx2('call')[2]
        s = survival.survdiff(fmla, r_data)
        p = str(s).split('\n\n')[-1].strip().split(', ')[-1]
        ls = r.c(*colors)
        
        
        r.plot(survival.survfit(fmla, r_data), lty=1, col=ls, lwd=4, cex=1.25, 
                                xlab='Years to Event', ylab='Survival');
        r.title(label, cex=3.)
        if ann=='p':
            r.text(0, labels='logrank ' + p, pos=4)
        elif ann != None:
            r.text(0, labels=ann, pos=4)
     
    if assignment is None:
        assignment = np.ones_like(feature)
        name = lambda v: feature.name
    else:
        name = lambda v: str(assignment.name) + ' = ' + str(v)
    if show_legend == 'out':  
        r.par(xpd=True, mar=r.c(4,5,5,8))
    for value in sorted(assignment.ix[feature.index].dropna().unique()):
        plot_me(feature.ix[assignment[assignment==value].index], name(value))

    if show_legend == True:
        mean_s = surv.ix[:,'event'].ix[assignment[assignment==value].index].mean()
        if mean_s < .4:
            r.legend(surv.ix[:,'days'].max() * .05 / 365., .45, labels, 
                     lty=1, col=ls, lwd=3, bty='o')
        else:
            r.legend(surv.ix[:,'days'].max() * .4 / 365, .9, labels, 
                     lty=1, col=ls, lwd=3, bty='o')
    elif show_legend == 'out':
        r.legend(surv.ix[:,'days'].max() * 1.1  / 365, .9, labels, 
                     lty=1, col=ls, lwd=3, bty='o')
    r('dev.off()')
    if show:
        return Show(filename)
In [15]:
stage = clinical.clinical.tumor_stage.map({'stge i': 'Stage I', 'stge ii': 'Stage II', 'stge iii': 'Stage III', 'stge iv': 'Stage IV'})
surv = clinical.survival.survival_5y
In [16]:
f = stage
f.name = 'Overall Survival'
t = get_surv_fit(surv, f)
t.columns = pd.MultiIndex.from_tuples([('','# Patients'), ('','# Deaths'), 
                           ('', 'Median OS'), ('95% Confidence Int.', 'Lower'),
                           ('95% Confidence Int.', 'Upper')])
f = draw_survival_curves(f, surv, colors=['green','blue','orange','red'], labels=list(f.unique()), show=True)
fig_tab(f, t)
Out[16]:
95% Confidence Int.
# Patients # Deaths Median OS Lower Upper
Stage I 241 29 NaN NaN NaN
Stage II 49 6 NaN NaN NaN
Stage III 121 45 4.48 3.21 NaN
Stage IV 76 54 1.78 1.26 3.28

Drugs administered

In [17]:
drugs_types = drugs.keys()
drug_categories = clinical.drugs.drugname.map(drug_map)
drug_given = pd.DataFrame({d: ((drug_categories == d).groupby(level=0).sum() > 0) for d in drugs_types})
In [18]:
fig, axs = subplots(1,2, figsize=(12,4))
crosstab(stage, drug_given.sum(1) > 0)[True].plot(kind='bar', ax=axs[0])
axs[0].set_ylabel('# of Patients')
axs[0].set_title('Patients Receiving Medication By Stage')

drug_given.sum().plot(kind='bar', ax=axs[1]);
axs[1].set_title('Drug Categories');
In [609]:
drug_given.sum()
Out[609]:
Chemo      11
IL2/IF     29
TKI        49
VEGF Ab    11
Vaccine     7
mTORi      12
In [47]:
s = drug_given.ix[stage.index[stage == 'Stage IV']].sum()
s.plot(kind='bar')
ylabel('# of Patients')
Out[47]:
<matplotlib.text.Text at 0x91c0590>
In [48]:
n = drug_given.ix[stage.index[stage == 'Stage IV']].dropna().sum(1).value_counts()[:5]
n.name = 'Number of Medications'
n.plot(kind='bar')
ylabel('# of Patients')
Out[48]:
<matplotlib.text.Text at 0x92f4710>
In [75]:
gc = drug_given.astype(int).astype(str).apply(lambda s: ''.join(s), axis=1)
gc.name = 'drugs'
In [76]:
one_drug = drug_given.ix[stage.index[stage == 'Stage IV']].dropna().sum(1) == 1
In [77]:
vc = drug_given.ix[one_drug[one_drug].index].sum().order()
vc.plot(kind='bar')
ylabel('# of Patients')
Out[77]:
<matplotlib.text.Text at 0x1214f250>
In [78]:
surv = clinical.survival.survival_5y
In [275]:
p = drug_given.ix[one_drug[one_drug].index]['IL2/IF']
clin = clinical.clinical.ix[p[p].index][['tumor_grade','calcium_level','histo_grade','neo_status','hemoglobin','gender','tissuesourcesite','age']]
tf = clinical.timeline.ix[p[p].index].sort(columns='daystodeath').ix[:,:2]
tf.join(clin)
Out[275]:
daystodeath daystolastfollowup tumor_grade calcium_level histo_grade neo_status hemoglobin gender tissuesourcesite age
TCGA-B0-4841 203 139 t2 low g3 with tumor low male b0 63.28
TCGA-BP-4335 460 460 t3a low g3 with tumor low female bp 65.41
TCGA-CJ-4923 572 572 t3a NaN g4 with tumor normal female cj 63.98
TCGA-B8-4143 709 709 t3a elevated g3 with tumor low female b8 66.12
TCGA-B0-4846 1199 1199 t3a normal g2 with tumor normal male b0 52.44
TCGA-CZ-4857 1432 1432 t3a low g3 with tumor low male cz 56.76
TCGA-CW-5580 1964 1964 t3a NaN g3 NaN NaN female cw 73.14
TCGA-BP-5201 NaN 951 t3b low g4 with tumor low male bp 63.36
TCGA-CJ-4871 NaN 2422 t3a normal g4 tumor free low male cj 63.55
TCGA-CJ-5682 NaN 1883 t3a normal g4 with tumor normal male cj 60.94
TCGA-CW-5591 NaN 2270 t3a NaN g2 with tumor NaN male cw 56.75
TCGA-CZ-5460 NaN 1430 t3b low g2 with tumor low male cz 55.87
In [282]:
c = mut.df.ix[:,tf.index].dropna(1).columns
In [283]:
tf.join(clin).ix[c]
Out[283]:
daystodeath daystolastfollowup tumor_grade calcium_level histo_grade neo_status hemoglobin gender tissuesourcesite age
TCGA-CJ-4923 572 572 t3a NaN g4 with tumor normal female cj 63.98
TCGA-B8-4143 709 709 t3a elevated g3 with tumor low female b8 66.12
TCGA-CW-5580 1964 1964 t3a NaN g3 NaN NaN female cw 73.14
TCGA-BP-5201 NaN 951 t3b low g4 with tumor low male bp 63.36
TCGA-CJ-5682 NaN 1883 t3a normal g4 with tumor normal male cj 60.94
TCGA-CW-5591 NaN 2270 t3a NaN g2 with tumor NaN male cw 56.75
TCGA-CZ-5460 NaN 1430 t3b low g2 with tumor low male cz 55.87
In [291]:
p1 = array(['TCGA-CJ-4923','TCGA-B8-4143'])
p2 = array(['TCGA-CW-5580','TCGA-CJ-5682','TCGA-CW-5591','TCGA-CZ-5460'])
In [288]:
pd.Series(np.ones_like(p1), p1)
---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
<ipython-input-288-5bb02331bfab> in <module>()
----> 1 pd.Series(np.ones_like(p1), p1)

NotImplementedError: Not implemented for this type
In [295]:
pat = pd.concat([pd.Series(np.ones(len(p1)), p1), 2.*pd.Series(np.ones(len(p2)), p2)])
pat.name = 'IL2'
draw_survival_curves(pat, surv, show=True)
Out[295]:
In [296]:
pat = pat.map({1: 'bad', 2: 'good'})
In [305]:
p = mut.df.ix[:,pat.index].dropna(axis=1)
In [306]:
good_counts = p.ix[:, pat[pat=='good'].index].dropna(axis=1).sum(1)
good_counts = good_counts[good_counts > 0].order()
In [307]:
bad_counts = p.ix[:, pat[pat=='bad'].index].dropna(axis=1).sum(1)
bad_counts = bad_counts[bad_counts > 0].order()
In [308]:
gb = pd.concat([good_counts, bad_counts], axis=1, keys=['good','bad']).fillna(0)
In [322]:
gb.bad.order().dropna().tail(2)
Out[322]:
ACSBG2    2
BAP1      2
Name: bad
In [357]:
gb.ix[gb.bad.order().dropna().tail(2).index]
Out[357]:
good bad
ACSBG2 0 2
BAP1 1 2
In [263]:
gb[(gb.good - gb.bad).abs() > 2]
Out[263]:
good bad
BIOCARTA_AGR_PATHWAY 3 0
KEGG_TGF_BETA_SIGNALING_PATHWAY 3 0
REACTOME_MEMBRANE_TRAFFICKING 3 0
SIG_PIP3_SIGNALING_IN_CARDIAC_MYOCTES 3 0
ST_INTEGRIN_SIGNALING_PATHWAY 3 0
In [242]:
mut.df.ix['SETD2'].ix[metastatic].value_counts()
Out[242]:
0    28
1     9
In [244]:
pd.crosstab(pd.Series(ones_like(mut.df.columns), mut.df.columns), stage)
Out[244]:
feature Stage I Stage II Stage III Stage IV
row_0
1 140 28 73 37
In [346]:
f = pd.concat([1.*(mut.df.ix['SETD2']), 2.*(mut.df.ix['BAP1']>0)], axis=1).sum(1)
f = f[(f > 0) * (f < 3)]
f.name = 'SETD2 vs. BAP1'
In [356]:
draw_survival_curves(mut.df.ix['ACSBG2'].ix[metastatic], surv, show=True)
Out[356]:
In [352]:
}draw_survival_curves(f.ix[metastatic], surv, show=True, labels=['SETD2','BAP1'])
Out[352]:
In [219]:
(gb.bad - gb.good).order().dropna()
Out[219]:
MUC4      -1
MYH1      -1
SETD2     -1
ABCA13     0
ANKS1B     0
BARD1      0
CDON       0
GPR98      0
PBRM1      0
PKHD1L1    0
POMZP3     0
In [264]:
s = pd.Series({m: anova(pat, vec) for m,vec in rppa.features.iterrows()})
s = s.order()
In [274]:
f = rppa.features.ix[s.index[0], metastatic]
f.name = str(f.name)
draw_survival_curves(f, surv, show=True, q=.5)
Out[274]:
In [265]:
violin_plot_pandas(pat, rppa.features.ix[s.index[0]]);
In [172]:
s
Out[172]:
KEGG_CELL_CYCLE                                                 0.02
BIOCARTA_PPARA_PATHWAY                                          0.03
KEGG_LYSOSOME                                                   0.03
KEGG_NON_HOMOLOGOUS_END_JOINING                                 0.03
KEGG_OOCYTE_MEIOSIS                                             0.03
KEGG_PROGESTERONE_MEDIATED_OOCYTE_MATURATION                    0.03
REACTOME_APCDC20_MEDIATED_DEGRADATION_OF_CYCLIN_B               0.03
REACTOME_APOPTOTIC_EXECUTION_PHASE                              0.03
REACTOME_AUTODEGRADATION_OF_CDH1_BY_CDH1_APC                    0.03
REACTOME_CDC20_PHOSPHO_APC_MEDIATED_DEGRADATION_OF_CYCLIN_A     0.03
REACTOME_CONVERSION_FROM_APC_CDC20_TO_APC_CDH1_IN_LATE_ANAPHASE    0.03
REACTOME_GENES_INVOLVED_IN_APOPTOTIC_CLEAVAGE_OF_CELLULAR_PROTEINS    0.03
REACTOME_GLUCOSE_TRANSPORT                                      0.03
REACTOME_HOST_INTERACTIONS_OF_HIV_FACTORS                       0.03
REACTOME_INACTIVATION_OF_APC_VIA_DIRECT_INHIBITION_OF_THE_APCOMPLEX    0.03
...
SIG_IL4RECEPTOR_IN_B_LYPHOCYTES                    NaN
SIG_INSULIN_RECEPTOR_PATHWAY_IN_CARDIAC_MYOCYTES   NaN
SPEN                                               NaN
ST_ADRENERGIC                                      NaN
ST_B_CELL_ANTIGEN_RECEPTOR                         NaN
ST_DIFFERENTIATION_PATHWAY_IN_PC12_CELLS           NaN
ST_ERK1_ERK2_MAPK_PATHWAY                          NaN
ST_GAQ_PATHWAY                                     NaN
ST_GRANULE_CELL_SURVIVAL_PATHWAY                   NaN
ST_INTERLEUKIN_4_PATHWAY                           NaN
ST_JAK_STAT_PATHWAY                                NaN
ST_PHOSPHOINOSITIDE_3_KINASE_PATHWAY               NaN
ST_STAT3_PATHWAY                                   NaN
ST_TYPE_I_INTERFERON_PATHWAY                       NaN
ST_WNT_CA2_CYCLIC_GMP_PATHWAY                      NaN
Length: 639
In [85]:
split_cols = lambda s: ','.join([d for i,d in enumerate(drug_given.columns) if s[i] == '1'])
t = get_surv_fit(surv, gc[one_drug[one_drug].index])
t.columns = pd.MultiIndex.from_tuples([('','# Patients'), ('','# Deaths'), 
                           ('', 'Median OS'), ('95% Confidence Int.', 'Lower'),
                           ('95% Confidence Int.', 'Upper')])
t.index = map(split_cols, t.index)
t
Out[85]:
95% Confidence Int.
# Patients # Deaths Median OS Lower Upper
mTORi 1 1 0.91 NaN NaN
Vaccine 3 3 1.78 1.57 NaN
TKI 15 9 2.95 0.94 NaN
IL2/IF 12 6 3.92 1.94 NaN
Chemo 1 1 0.25 NaN NaN
In [84]:
draw_survival_curves(gc[one_drug[one_drug].index], surv, colors=['red','orange','green','purple','blue','yellow'], 
                     labels=[c for c in drug_given.columns if vc[c] > 0][::-1], show=True, show_legend='out')
Out[84]:
In [98]:
gc.value_counts()
Out[98]:
001000    31
010000    16
001001     5
000010     3
011000     2
110100     2
101100     2
000001     2
000000     2
110000     2
001010     1
111010     1
011110     1
010001     1
001101     1
001100     1
111101     1
111100     1
111110     1
000101     1
011001     1
100000     1
In [151]:
drug_lists = gc.apply(lambda s: ','.join([d for i,d in enumerate(drug_given.columns) if s[i] == '1']))
In [58]:
pd.crosstab(drug_lists, stage).ix[1:].T.plot(kind='bar')
Out[58]:
<matplotlib.axes.AxesSubplot at 0x95dc210>
In [194]:
mut = cancer.load_data('MAF')
mut.uncompress()
meth = cancer.load_data('Methylation')
cn = cancer.load_data('CN_broad')
cn.uncompress()
rna = cancer.load_data('mRNASeq')
rppa  = cancer.load_data('RPPA')
In [47]:
vhl_mut = mut.df.ix['VHL']
vhl_mut.name = 'VHL_mut'
vhl_meth = meth.df.ix['VHL']
vhl_meth.name = 'VHL_meth'
vhl_rna = rna.df.ix['VHL']
vhl_rna.name = 'VHL_rna'
In [20]:
cdk_del = cn.df.ix['Deletion'].ix['9p21.3'].ix[0]
cdk_del.name = 'del_band'
In [21]:
draw_survival_curves_split(cdk_del, clinical.clinical.tumor_stage, surv, ann='p', show=True)
Out[21]:
In [22]:
draw_survival_curves_split(vhl_mut, stage, surv, ann='p', q=.25, show=True)
Out[22]:
In [272]:
draw_survival_curves(f, surv, ann='p', show=True)
Out[272]:

Mutations in Stage 4

In [35]:
met = (mut.df.ix[:,metastatic] > 0).sum(1).order()
met = met[met>2]
g = (mut.df > 0).sum(1).order()
g = g.ix[met.index]
In [39]:
m = pd.concat([met, g-met, g], keys=['Metastatic','Non-Metastatic', 'All'], axis=1)
In [ ]:
pd.crosstab(
In [42]:
m
Out[42]:
Metastatic Non-Metastatic All
Hugo_Symbol
AASS 3 2 5
ABCA13 3 9 12
ADAMTS19 3 4 7
ANKRD32 3 0 3
ATP1A1 3 2 5
CACNA1C 3 4 7
COL22A1 3 2 5
DNAH3 3 6 9
DNAH8 3 7 10
FBN2 3 7 10
FRY 3 4 7
HERC2 3 4 7
IGSF1 3 3 6
KIAA1524 3 0 3
MLL3 3 16 19
MTOR 3 19 22
MYH1 3 3 6
NBPF9 3 4 7
NRXN1 3 6 9
PCLO 3 14 17
PKHD1L1 3 8 11
POMZP3 3 0 3
PRKDC 3 2 5
RELN 3 0 3
SYNE1 3 10 13
SYNE2 3 4 7
GPR98 4 12 16
MUC16 4 28 32
PDXDC1 4 1 5
PRUNE2 4 3 7
NBPF10 5 13 18
BAP1 9 16 25
MUC4 9 27 36
SETD2 9 24 33
TTN 11 58 69
PBRM1 16 89 105
VHL 18 120 138
In [25]:
g = (mut.df.ix[:,metastatic] > 0).sum(1).order()
g = g[g>2]
g.plot(kind='bar')
ylabel('# of Patients')
Out[25]:
<matplotlib.text.Text at 0x619be50>
In [29]:
g
Out[29]:
Hugo_Symbol
AASS            3
ABCA13          3
ADAMTS19        3
ANKRD32         3
ATP1A1          3
CACNA1C         3
COL22A1         3
DNAH3           3
DNAH8           3
FBN2            3
FRY             3
HERC2           3
IGSF1           3
KIAA1524        3
MLL3            3
MTOR            3
MYH1            3
NBPF9           3
NRXN1           3
PCLO            3
PKHD1L1         3
POMZP3          3
PRKDC           3
RELN            3
SYNE1           3
SYNE2           3
GPR98           4
MUC16           4
PDXDC1          4
PRUNE2          4
NBPF10          5
BAP1            9
MUC4            9
SETD2           9
TTN            11
PBRM1          16
VHL            18
In [411]:
def pathway_plot(df, clip=True, ax=None):
    df = df.ix[df.sum(1) > 0, df.sum() > 0]
    df = df.ix[df.sum(1).order(ascending=False).index]
    o = sort(df.apply(lambda s: ''.join(map(str, s)))).index[::-1]
    df = df[o]
    
    if (df.shape[0] > 20) and clip:
        rest = Series(df.ix[10:].sum().clip_upper(1.), name='rest')
        df = df.ix[:10]
        df = df.append(rest)
    if ax is None:
        fig, ax = plt.subplots(figsize=(df.shape[1]*.2,df.shape[0]*.5))
    else:
        fig = ax.get_figure()
    memo_plot(df, ax=ax)
    ax.bar(arange(len(df.columns)) - .3, df.sum() / df.sum().max(), bottom=-1.5, 
           width=.6, alpha=.5)
    counts = df.sum(1)[::-1]
    width = df.shape[1]
    ax.barh(arange(len(counts)) - .3, (counts / counts.max())*width*.25, left=width - .2, 
            height=.6, alpha=.5)
    ax.set_frame_on(False)
    ax.tick_params(right='off')
    fig.tight_layout()
In [421]:
pathway_plot(mut.df.ix[g[g>3].index,metastatic], False)
In [438]:
counts = mut.df.ix[:,metastatic].sum(1)
counts.name ='count'
In [439]:
#t = run_feature_matrix(mut.df.ix[g[g>3].index,metastatic], test)
t.join(counts).sort(columns=[('Full','LR')])
Out[439]:
(Full, LR) (Full, LR_q) (Full, fmla) (Univariate, hazzard) (Univariate, p) (Univariate, q) count
SETD2 0.00 0.01 Surv(days, event) ~ feature + mutation__rate_n... 0.36 0.10 0.65 9
VHL 0.01 0.05 Surv(days, event) ~ feature + mutation__rate_n... 0.64 0.31 0.65 18
PRUNE2 0.05 0.14 Surv(days, event) ~ feature + mutation__rate_n... 0.27 0.21 0.65 4
GPR98 0.06 0.14 Surv(days, event) ~ feature + age + feature:age\n 1.75 0.37 0.65 4
TTN 0.06 0.14 Surv(days, event) ~ feature + mutation__rate_n... 1.26 0.31 0.65 18
NBPF10 0.37 0.68 Surv(days, event) ~ feature + mutation__rate_n... 1.53 0.25 0.65 6
BAP1 NaN NaN NaN 1.13 0.79 0.98 9
MUC16 NaN NaN NaN 0.98 0.98 0.98 5
MUC4 NaN NaN NaN 0.96 0.83 0.98 17
PDXDC1 NaN NaN NaN 0.97 0.97 0.98 4
PBRM1 1.00 1.00 Surv(days, event) ~ 1\n 1.38 0.42 0.65 17
In [395]:
233 + 16 + 4 + 74
Out[395]:
327
In [ ]:
233 + 16 + 4 + 74
In [404]:
surv = clinical.survival.survival
In [406]:
f = (mut.features.ix['PBRM1']>0) + (2.*(mut.features.ix['BAP1']>0))
f.name = 'f'
draw_survival_curves(f, surv, stage, ann='p', colors=['green','blue','red', 'purple'], show=True, show_legend=False)
Out[406]:
In [271]:
f = mut.features.ix['WNT_SIGNALING']
draw_survival_curves(f, surv, stage, ann='p', show=True)
Out[271]:
In [24]:
metastatic = stage[stage == 'Stage IV'].index
In [424]:
survival_test = 'survival_5y'
covariates = ['age', ('mutation', 'rate_non')]
cov_df = global_vars.join(clinical.clinical, how='outer').join(cdk_del)
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
In [425]:
df = mut.features.ix[:,stage[stage.isin(['Stage IV'])].index]
df = df.dropna(axis=1)
counts = Series(df.sum(1), name='counts')
df  = df[counts > 6]
In [227]:
#del get_cox_ph_ms.null_model
mut_met = run_feature_matrix(df, test)
mut_met = mut_met.join(counts).sort(columns=[('Full','LR')])
In [378]:
mut_met.ix['PBRM1']
Out[378]:
(Full, LR)                 NaN
(Full, LR_q)               NaN
(Full, fmla)               NaN
(Univariate, hazzard)     1.27
(Univariate, p)          0.581
(Univariate, q)          0.972
counts                      16
Name: PBRM1
In [377]:
mut_met.head(25).astype(object)
Out[377]:
(Full, LR) (Full, LR_q) (Full, fmla) (Univariate, hazzard) (Univariate, p) (Univariate, q) counts
WNT_SIGNALING 0.000552 0.049 Surv(days, event) ~ feature + age\n 6.17 0.000285 0.0504 8
REACTOME_HOST_INTERACTIONS_OF_HIV_FACTORS 0.000554 0.049 Surv(days, event) ~ feature * mutation__rate_n... 1.39 0.452 0.972 12
SETD2 0.00104 0.0614 Surv(days, event) ~ feature + mutation__rate_n... 0.359 0.101 0.94 9
REACTOME_COSTIMULATION_BY_THE_CD28_FAMILY 0.00796 0.264 Surv(days, event) ~ feature + age + feature:age\n 2.76 0.0387 0.893 7
VHL 0.00953 0.264 Surv(days, event) ~ feature + mutation__rate_n... 0.644 0.312 0.972 18
REACTOME_CLASS_A1_RHODOPSIN_LIKE_RECEPTORS 0.0106 0.264 Surv(days, event) ~ feature + mutation__rate_n... 1.93 0.127 0.972 17
REACTOME_APOPTOSIS 0.0124 0.264 Surv(days, event) ~ feature + mutation__rate_n... 2.71 0.0207 0.893 12
KEGG_FOCAL_ADHESION 0.0134 0.264 Surv(days, event) ~ feature * mutation__rate_n... 1.77 0.263 0.972 26
REACTOME_METABOLISM_OF_AMINO_ACIDS 0.0136 0.264 Surv(days, event) ~ feature + age + feature:age\n 2.05 0.0961 0.94 13
KEGG_GAP_JUNCTION 0.0149 0.264 Surv(days, event) ~ feature + mutation__rate_n... 1.56 0.336 0.972 10
KEGG_LONG_TERM_POTENTIATION 0.0173 0.278 Surv(days, event) ~ feature + age + feature:age\n 2.42 0.0415 0.893 15
KEGG_CARDIAC_MUSCLE_CONTRACTION 0.0197 0.291 Surv(days, event) ~ feature + age + feature:age\n 0.6 0.288 0.972 15
KEGG_REGULATION_OF_ACTIN_CYTOSKELETON 0.0227 0.3 Surv(days, event) ~ feature * mutation__rate_n... 1.67 0.268 0.972 23
REACTOME_APOPTOTIC_EXECUTION_PHASE 0.026 0.3 Surv(days, event) ~ feature + mutation__rate_n... 3.74 0.00281 0.249 10
KEGG_WNT_SIGNALING_PATHWAY 0.027 0.3 Surv(days, event) ~ feature + mutation__rate_n... 2.15 0.0765 0.893 12
REACTOME_PROCESSING_OF_CAPPED_INTRON_CONTAINING_PRE_MRNA 0.0271 0.3 Surv(days, event) ~ feature + mutation__rate_n... 1.84 0.172 0.972 19
KEGG_LONG_TERM_DEPRESSION 0.0367 0.382 Surv(days, event) ~ feature + mutation__rate_n... 2.15 0.0807 0.893 13
REACTOME_GPCR_LIGAND_BINDING 0.0489 0.439 Surv(days, event) ~ feature + age + feature:age\n 1.8 0.187 0.972 21
REACTOME_GENES_INVOLVED_IN_APOPTOTIC_CLEAVAGE_OF_CELLULAR_PROTEINS 0.0585 0.439 Surv(days, event) ~ feature + mutation__rate_n... 3.37 0.00676 0.399 9
REACTOME_CELL_JUNCTION_ORGANIZATION 0.059 0.439 Surv(days, event) ~ feature * mutation__rate_n... 1.69 0.238 0.972 12
REACTOME_CELL_SURFACE_INTERACTIONS_AT_THE_VASCULAR_WALL 0.0601 0.439 Surv(days, event) ~ feature + mutation__rate_n... 1.69 0.226 0.972 12
KEGG_OOCYTE_MEIOSIS 0.0613 0.439 Surv(days, event) ~ feature + mutation__rate_n... 2.24 0.0666 0.893 12
BIOCARTA_P53HYPOXIA_PATHWAY 0.0615 0.439 Surv(days, event) ~ feature + mutation__rate_n... 2.49 0.0748 0.893 7
KEGG_ENDOCYTOSIS 0.0659 0.439 Surv(days, event) ~ feature + mutation__rate_n... 0.741 0.492 0.972 16
KEGG_TOLL_LIKE_RECEPTOR_SIGNALING_PATHWAY 0.0664 0.439 Surv(days, event) ~ feature + mutation__rate_n... 2.37 0.0652 0.893 8
In [253]:
import Reports.Figures as F
reload(F)
from Reports.Figures import *
In [230]:
from Reports.NotebookTools import *
In [275]:
def draw_me(f):
    split_by_stage = draw_survival_curves(mut.features.ix[f], surv, stage, ann='p', show=True)
    all_surv = draw_survival_curves(mut.features.ix[f], surv, ann='p', show=True)
    curves = draw_survival_curves(mut.features.ix[f, df.columns], surv, ann='p', filename='tmp.png', show=True)
    try:
        figsize=(6,4)
        pathway_plot(mut.df.ix[run.gene_sets[f], df.columns], plt.gca())
        plt.tight_layout()
        plt.savefig('tmp1.png', dpi=75, bbox_inches=0, pad_inches=0)
        plt.close('all')
        return stack([side_by_side(['tmp.png', 'tmp1.png']), split_by_stage, all_surv])
    except:
        return stack([curves, split_by_stage, all_surv])
    
s = stack([draw_me(f) for f in mut_met.index[:15]])
s
Out[275]:

In [336]:
df = cn.features.ix[:,stage[stage.isin(['Stage IV'])].index]
df = df.dropna(axis=1)
counts = Series((df != 0).sum(1), name='counts')
df  = df[counts > 5]
In [337]:
survival_test = 'survival_5y'
covariates =  ['age', ('cna', 'chrom_instability')]
cov_df = global_vars.join(clinical.clinical, how='outer').join(cdk_del)
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
In [262]:
cna_met = run_feature_matrix(df, test)
cna_met = cna_met.join(counts).sort(columns=[('Full','LR_q')])
In [264]:
cna_met.head(10)
Out[264]:
(Full, LR) (Full, LR_q) (Full, fmla) (Univariate, hazzard) (Univariate, p) (Univariate, q) counts
Deletion 3p25.3 Lesion 0.00 0.18 Surv(days, event) ~ feature + cna__chrom_insta... 0.66 0.49 1.00 69
13q13.3 Lesion 0.01 0.18 Surv(days, event) ~ age + feature + cna__chrom... 0.41 0.00 0.16 19
Amplification 1q24.1 Lesion 0.03 0.44 Surv(days, event) ~ feature\n 1.94 0.02 0.52 11
5q22.1 (CAMK4, EPB41L4A, SLC25A46, STARD4, TMEM232, WDR36) 0.05 0.44 Surv(days, event) ~ age + feature + cna__chrom... 0.83 0.44 1.00 42
Xp11.4 Lesion 0.06 0.44 Surv(days, event) ~ feature\n 0.46 0.11 1.00 9
Deletion 10q23.31 Lesion 0.08 0.44 Surv(days, event) ~ feature + cna__chrom_insta... 0.59 0.09 1.00 19
4q34.3 Lesion 0.09 0.44 Surv(days, event) ~ age * feature + age * cna_... 0.78 0.40 1.00 21
Amplification 5q33.2 (CNOT8, FAM114A2, GEMIN5, LARP1, MFAP3, MRPL22, SAP30L) 0.09 0.44 Surv(days, event) ~ feature + cna__chrom_insta... 0.83 0.33 1.00 52
5q33.3 (ADAM19, C5orf54, CLINT1, CYFIP2, FABP6, HAVCR1, ITK, LSM11, MED7, PWWP2A, RNF145, SLU7, THG1L, TIMD4, TTC1, UBLCP1) 0.09 0.44 Surv(days, event) ~ feature + cna__chrom_insta... 0.83 0.33 1.00 52
Xq11.2 Lesion 0.10 0.44 Surv(days, event) ~ feature\n 0.43 0.15 1.00 7
In [44]:
from Data.Firehose import get_gistic_gene_matrix
In [45]:
cn_genes = get_gistic_gene_matrix(run.data_path, cancer.name)
vhl_cn = cn_genes.xs('VHL', level=2).ix[0]
vhl_cn.name = 'VHL_cn'
In [62]:
pd.crosstab([vhl_cn, vhl_mut], stage).ix[-1].T.plot(kind='bar')
Out[62]:
<matplotlib.axes.AxesSubplot at 0x1308c0d0>
In [71]:
f = vhl_cn[vhl_cn == -1].index
f=  vhl_mut.ix[f].dropna()
In [72]:
vhl_mut.shape
Out[72]:
(278,)
In [73]:
f.shape
Out[73]:
(234,)
In [68]:
draw_survival_curves(vhl_mut, surv, stage, show=True)
Out[68]:
In [67]:
draw_survival_curves(f, surv, stage, show=True)
Out[67]:
In [59]:
vhl_cn.shape
Out[59]:
(493,)
In [57]:
pd.crosstab(vhl_cn, stage)
Out[57]:
feature Stage I Stage II Stage III Stage IV
VHL_cn
-2 28 5 9 12
-1 177 37 108 57
0 28 6 3 4
1 1 1 0 2
In [338]:
def draw_me(f):
    feature = cn.features.ix[f, df.columns]
    feature.name = str(feature.name)
    
    labels = Series({-2: 'Homozygous Deletion', -1: 'Deletion', 0: 'Normal', 1: 'Amp', 2: 'High Amp'})
    colors = Series({-2: 'black', -1: 'purple', 0: 'blue', 1: 'orange', 2: 'red'})
    curves = draw_survival_curves(feature, surv, colors=colors[sorted(feature.unique())].tolist(), 
                                  labels=labels[sorted(feature.unique())].tolist(), show=True, show_legend=True,
                                  ann='p')
    feature = cn.features.ix[f]
    feature.name = str(feature.name)
    split_by_stage = draw_survival_curves(feature, surv, stage, colors=colors[sorted(feature.unique())].tolist(), 
                                  labels=labels[sorted(feature.unique())].tolist(), show=True, show_legend=True,
                                  ann='p')
    all_surv = draw_survival_curves(feature, surv, colors=colors[sorted(feature.unique())].tolist(), 
                                  labels=labels[sorted(feature.unique())].tolist(), show=True, show_legend=True,
                                  ann='p')
    return stack([curves, split_by_stage, all_surv])

s = stack([draw_me(f) for f in cna_met.index[:10]])
s
Out[338]:

In [880]:
crosstab(f, stage)
Out[880]:
feature Stage I Stage II Stage III
('Deletion', '9p21.3', ('CDKN2A', 'CDKN2B'))
-2 0 1 3
-1 49 9 47
0 181 37 67
1 4 2 3
In [888]:
f = cn.features.ix[il2s.index[4], df.columns]
f.name = str(f.name)
labels = Series({-2: 'Homozygous Deletion', -1: 'Deletion', 0: 'Normal', 1: 'Amp', 2: 'High Amp'})
colors = Series({-2: 'black', -1: 'purple', 0: 'blue', 1: 'orange', 2: 'red'})
draw_survival_curves(f, surv, stage, colors=colors[sorted(f.unique())].tolist(), 
                     labels=labels[sorted(f.unique())].tolist(), show=True, show_legend=True)
Out[888]:
In [344]:
survival_test = 'survival_5y'
covariates =  ['age']
cov_df = global_vars.join(clinical.clinical, how='outer').join(cdk_del)
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
In [345]:
df = rppa.features.ix[:,stage[stage.isin(['Stage IV'])].index]
df = df.dropna(axis=1)
counts = Series((df != 0).sum(1), name='counts')
In [346]:
rppa_met = run_feature_matrix(df, test)
rppa_met = rppa_met.join(counts).sort(columns=[('Full','LR')])
In [347]:
rppa_met.head(10)
Out[347]:
(Full, LR) (Full, LR_q) (Full, fmla) (Univariate, hazzard) (Univariate, p) (Univariate, q) counts
phos_pc PRKAA1 2.54e-04 0.04 Surv(days, event) ~ age * feature\n 1.36e-04 8.98e-03 0.14 73
protiens (PRKAA1, AMPK_pT172-R-V) 3.10e-04 0.04 Surv(days, event) ~ age * feature\n 5.05e-01 8.39e-03 0.14 73
(AR, AR-R-V) 3.43e-04 0.04 Surv(days, event) ~ feature\n 3.57e-01 4.10e-04 0.05 73
(NFKB1, NF-kB-p65_pS536-R-C) 4.81e-04 0.04 Surv(days, event) ~ feature\n 5.08e-01 4.99e-04 0.05 73
(GATA3, GATA3-M-V) 1.32e-03 0.08 Surv(days, event) ~ feature\n 5.18e+00 1.52e-04 0.05 73
(RPS6, S6-R-NA) 2.35e-03 0.12 Surv(days, event) ~ feature\n 2.79e+00 1.44e-03 0.08 73
(PEA15, PEA-15-R-V) 2.59e-03 0.12 Surv(days, event) ~ feature\n 3.84e+00 1.69e-03 0.08 73
(STK11, LKB1-M-NA) 3.24e-03 0.12 Surv(days, event) ~ feature\n 2.84e+01 2.06e-03 0.08 73
phos_pc ERBB3 3.30e-03 0.12 Surv(days, event) ~ feature\n 1.77e-06 4.74e-03 0.14 73
pathways BIOCARTA_FAS_PATHWAY 4.05e-03 0.13 Surv(days, event) ~ feature\n 3.03e-04 2.43e-03 0.09 73
In [365]:
s = rppa.features.ix[rppa_met.index[:10]].T
s.columns = pd.Index(map(str, s.columns))
In [ ]:
pd.scatter_matrix(
In [353]:
series_scatter(rppa.features.ix[rppa_met.index[0], df.columns], rppa.features.ix[rppa_met.index[4], df.columns])
In [348]:
def draw_me(f):
    feature = rppa.features.ix[f, df.columns]
    feature.name = str(feature.name)
   
    curves = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    feature = rppa.features.ix[f]
    feature.name = str(feature.name)
    split_by_stage = draw_survival_curves(feature, surv, stage, show=True, show_legend=True, ann='p')
    all_surv = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    return stack([curves, split_by_stage, all_surv])

s = stack([draw_me(f) for f in rppa_met.index[:10]])
s
Out[348]:

In [358]:
global_vars
Out[358]:
<class 'pandas.core.frame.DataFrame'>
Index: 486 entries, TCGA-A3-3306 to TCGA-EU-5907
Data columns:
('mRNASeq', 'pc1')                 465  non-null values
('mRNASeq', 'pc2')                 465  non-null values
('methylation', 'pc1')             271  non-null values
('methylation', 'pc2')             271  non-null values
('cna', 'chrom_instability')       478  non-null values
('cna', 'gene_amp')                478  non-null values
('cna', 'gene_amp_high')           478  non-null values
('cna', 'gene_del')                478  non-null values
('cna', 'gene_del_homo')           478  non-null values
('cna', 'lesion_amp')              478  non-null values
('cna', 'lesion_amp_high')         478  non-null values
('cna', 'lesion_del')              478  non-null values
('cna', 'lesion_del_homo')         478  non-null values
('mutation', 'rate_dbsnp')         278  non-null values
('mutation', 'rate_sil')           278  non-null values
('mutation', 'rate_non')           278  non-null values
('mutation', '*CpG->T')            278  non-null values
('mutation', '*ApG->G')            278  non-null values
('mutation', '*Np(A_C_T)->transit')278  non-null values
('mutation', 'transver')           278  non-null values
('mutation', 'indel+null')         278  non-null values
('mutation', 'double_null')        278  non-null values
dtypes: float64(22)

Methylation

In [359]:
survival_test = 'survival_5y'
covariates =  ['age', ('methylation', 'pc1')]
cov_df = global_vars.join(clinical.clinical, how='outer').join(cdk_del)
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
In [360]:
df = meth.features.ix[:,stage[stage.isin(['Stage IV'])].index]
df = df.dropna(axis=1)
meth_met = run_feature_matrix(df, test)
meth_met = meth_met.sort(columns=[('Full','LR')])
In [363]:
meth_met.head(10)
Out[363]:
Full Univariate
LR LR_q fmla hazzard p q
BIOCARTA_GLYCOLYSIS_PATHWAY 1.30e-04 0.03 Surv(days, event) ~ feature + methylation__pc1... 1.28e+05 2.15e-04 0.01
BIOCARTA_CDMAC_PATHWAY 2.59e-04 0.03 Surv(days, event) ~ feature\n 1.43e+05 3.53e-04 0.02
REACTOME_SHC_MEDIATED_SIGNALLING 6.21e-04 0.04 Surv(days, event) ~ feature + age + feature:age\n 1.16e+04 2.84e-05 0.01
REACTOME_GRB2_EVENTS_IN_EGFR_SIGNALING 1.12e-03 0.05 Surv(days, event) ~ feature + age + feature:age\n 6.65e+03 7.16e-05 0.01
REACTOME_AKT_PHOSPHORYLATES_TARGETS_IN_THE_CYTOSOL 1.20e-03 0.05 Surv(days, event) ~ feature\n 1.76e+04 1.29e-03 0.04
REACTOME_MTORC1_MEDIATED_SIGNALLING 1.56e-03 0.05 Surv(days, event) ~ feature\n 7.58e+03 1.16e-03 0.04
REACTOME_ACTIVATED_TAK1_MEDIATES_P38_MAPK_ACTIVATION 2.33e-03 0.07 Surv(days, event) ~ feature + methylation__pc1... 5.81e+04 9.17e-03 0.12
REACTOME_GAMMA_CARBOXYLATION_TRANSPORT_AND_AMINO_TERMINAL_CLEAVAGE_OF_PROTEINS 3.37e-03 0.08 Surv(days, event) ~ feature\n 7.43e-06 3.48e-03 0.09
BIOCARTA_BARR_MAPK_PATHWAY 5.88e-03 0.12 Surv(days, event) ~ feature\n 2.81e+03 5.75e-03 0.11
REACTOME_RNA_POLYMERASE_III_CHAIN_ELONGATION 6.31e-03 0.12 Surv(days, event) ~ feature\n 2.87e+03 7.93e-03 0.12
In [442]:
n = meth_met.index[0]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_GLYCOLYSIS_PATHWAY
Out[442]:
In [443]:
n = meth_met.index[1]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_CDMAC_PATHWAY
Out[443]:
In [444]:
n = meth_met.index[2]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
REACTOME_SHC_MEDIATED_SIGNALLING
Out[444]:
In [445]:
n = meth_met.index[3]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
REACTOME_GRB2_EVENTS_IN_EGFR_SIGNALING
Out[445]:
In [446]:
n = meth_met.index[4]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
REACTOME_AKT_PHOSPHORYLATES_TARGETS_IN_THE_CYTOSOL
Out[446]:
In [447]:
n = meth_met.index[5]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
REACTOME_MTORC1_MEDIATED_SIGNALLING
Out[447]:
In [448]:
n = meth_met.index[6]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
REACTOME_ACTIVATED_TAK1_MEDIATES_P38_MAPK_ACTIVATION
Out[448]:
In [449]:
n = meth_met.index[7]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
REACTOME_GAMMA_CARBOXYLATION_TRANSPORT_AND_AMINO_TERMINAL_CLEAVAGE_OF_PROTEINS
Out[449]:
In [450]:
n = meth_met.index[8]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_BARR_MAPK_PATHWAY
Out[450]:
In [451]:
n = meth_met.index[8]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_BARR_MAPK_PATHWAY
Out[451]:
In [452]:
n = meth_met.index[9]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
REACTOME_RNA_POLYMERASE_III_CHAIN_ELONGATION
Out[452]:
In [375]:
def draw_me(f):
    feature = meth.features.ix[f, df.columns]
    feature.name = str(feature.name)
   
    curves = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    feature = meth.features.ix[f]
    feature.name = str(feature.name)
    split_by_stage = draw_survival_curves(feature, surv, stage, show=True, show_legend=True, ann='p')
    pathway = Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, f))
    all_surv = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    return stack([curves, split_by_stage, all_surv])

s = stack([draw_me(f) for f in meth_met.index[:10]])
s
Out[375]:

In [378]:
pathway = Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, meth_met.index[5]))
pathway
Out[378]:

Expression

In [415]:
survival_test = 'survival_5y'
covariates =  ['age']
cov_df = global_vars.join(clinical.clinical, how='outer').join(cdk_del)
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
In [416]:
df = rna.features.ix[:,stage[stage.isin(['Stage IV'])].index]
df = df.dropna(axis=1)
rna_met = run_feature_matrix(df, test)
rna_met = rna_met.sort(columns=[('Full','LR')])
In [427]:
rna_met.head(10)
Out[427]:
Full Univariate
LR LR_q fmla hazzard p q
KEGG_GLYCINE_SERINE_AND_THREONINE_METABOLISM 2.81e-04 0.04 Surv(days, event) ~ feature\n 9.73e-06 7.55e-05 0.02
KEGG_PRIMARY_BILE_ACID_BIOSYNTHESIS 3.14e-04 0.04 Surv(days, event) ~ feature\n 2.69e-06 1.90e-04 0.02
REACTOME_HDL_MEDIATED_LIPID_TRANSPORT 3.84e-04 0.04 Surv(days, event) ~ feature\n 2.17e-06 1.20e-04 0.02
KEGG_HISTIDINE_METABOLISM 4.06e-04 0.04 Surv(days, event) ~ feature\n 1.35e-05 1.61e-04 0.02
KEGG_RENIN_ANGIOTENSIN_SYSTEM 6.71e-04 0.06 Surv(days, event) ~ feature\n 1.59e-05 2.66e-04 0.02
KEGG_PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS 9.09e-04 0.06 Surv(days, event) ~ feature\n 1.15e-05 3.45e-04 0.02
REACTOME_GLUTATHIONE_CONJUGATION 1.10e-03 0.06 Surv(days, event) ~ feature\n 4.83e-05 5.89e-04 0.03
KEGG_ASCORBATE_AND_ALDARATE_METABOLISM 1.45e-03 0.07 Surv(days, event) ~ age * feature\n 1.36e-05 2.15e-04 0.02
REACTOME_BOTULINUM_NEUROTOXICITY 2.20e-03 0.07 Surv(days, event) ~ feature\n 2.69e+04 2.31e-03 0.06
REACTOME_LOSS_OF_NLP_FROM_MITOTIC_CENTROSOMES 2.31e-03 0.07 Surv(days, event) ~ feature\n 3.36e+04 3.64e-03 0.07
In [418]:
def draw_me(f):
    feature = rna.features.ix[f, df.columns]
    feature.name = str(feature.name)
   
    curves = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    feature = rna.features.ix[f]
    feature.name = str(feature.name)
    split_by_stage = draw_survival_curves(feature, surv, stage, show=True, show_legend=True, ann='p')
    #pathway = Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, f))
    all_surv = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    return stack([curves, split_by_stage, all_surv])

s = stack([draw_me(f) for f in rna_met.index[:10]])
s
Out[418]:

In [429]:
pathway = Image(filename='{}/Figures/PathwayPlots/{}.png'.format(rna.path, rna_met.index[1]))
pathway
Out[429]: